source("mergeAllData.R", local = knitr::knit_global())
##
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
## here() starts at /Users/amelia/github/MHWsim-expt
## Rows: 1425 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): genet
## dbl (7): tank, avg_size, sd_size, avg_biomass_g, sd_biomass_g, n_polyps_sam...
## date (1): date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 3000 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): Date, Genet, Comments
## dbl (7): Tank, Alive, Dying/Dead, Removed, Floating, N, Splitting
## time (1): Time
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 2925 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): Date, Genet, Comments
## dbl (5): Tank, Open, Partial open, Partial closed, Closed
## time (1): Time
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#source("readRPiData.R", local = knitr::knit_global()) # Load temperature data AFTER you have uncommented and run this line
weekly_temp <- read_csv(here("experiment", "data", "weekly_temperature.csv"))
## Rows: 300 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): treatment
## dbl (4): tank, avg_temp, min_temp, max_temp
## date (1): friday
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
A_matrix <- matrix(rep(c("A", "B", "C", "D", "E"), times = 15), nrow = 75, ncol = 1)
temp_covariate <- weekly_temp %>%
filter(friday < ymd("2024-02-16")) %>%
arrange(friday, tank) %>%
slice(rep(1:n(), each = 5)) %>%
mutate(tank_rep = paste0(tank, letters[1:5][rep(1:5, times = n()/5)])) %>%
select(tank_rep, friday, avg_temp) %>%
pivot_wider(names_from = tank_rep, values_from = avg_temp) %>%
mutate(week = row_number()) %>%
select(-friday) %>%
column_to_rownames(var = "week") %>%
as.matrix() %>%
t()
C_matrix <- matrix(factor(rep(1:15, each = 5)), ncol = 1)
Z_models <- list(
H1 = matrix(factor(rep(1, 75))),
H2 = matrix(factor(c(rep("amb", 25), rep("sev", 25), rep("ext", 25)))),
H3 = matrix(factor(rep(c("A", "B", "C", "D", "E"), 15))),
H4 = matrix(factor(c(rep("amb", 25), rep("mhw", 50)))),
H5 = matrix(factor(c(rep("amb", 25), rep("sev", 25), rep("amb", 25)))), # post-hoc hypothesis
H6 = matrix(factor(rep(c("gen1", "gen2", "gen2", "gen2", "gen1"), 15))), # post-hoc hypothesis
H7 = matrix(factor(rep(c("gen1", "gen2", "gen3", "gen4", "gen1"), 15))), # post-hoc hypothesis
H8 = matrix(factor(c(rep(c("amb_gen1", "amb_gen2", "amb_gen2", "amb_gen2", "amb_gen1"), 5),
rep(c("sev_gen1", "sev_gen2", "sev_gen2", "sev_gen2", "sev_gen1"),5),
rep(c("amb_gen1", "amb_gen2", "amb_gen2", "amb_gen2", "amb_gen1"), 5))))) # post-hoc hypothesis
#H9 = matrix(factor(1:75)) #Do not use! Overfits the data and has by far the worst AIC
names(Z_models) <- c("all_same", "diff_treat", "diff_genet", "mhw_same", "amb_ext_same", "AE_BCD", "AE_B_C_D", "ambExt_AE_BCD") #, "all_diff")
mod_list <- list(
B = "identity", #pretty sure this is right, random walk diagonal and unequal has worse AIC/BIC
U = "zero", #unequal has worse AIC/BIC
Q = "diagonal and equal", #All tanks have same mixing; random walk diagonal and unequal has worse AIC/BIC
Z = "placeholder",
A = A_matrix,
R = "diagonal and equal", #errors are independent and equally distributed
C = C_matrix,
c = temp_covariate)
all_size <- all %>%
filter(!is.na(avg_size)) %>%
mutate(week = as.numeric(difftime(date, min(weekly_temp$friday), units = "weeks")) + 1,
avg_size_log = log(avg_size),
sd_size_log = log(sd_size),
tank = as.numeric(tank)) %>%
select(date, week, tank, mhw, genet, avg_size_log) %>%
left_join(weekly_temp, by = c("date" = "friday", "tank"))
all_size_marss <- all_size %>%
mutate(genet_tank = paste(genet, tank, sep = "_")) %>%
select(-date, -tank, -genet, -mhw, -treatment, -avg_temp, -min_temp, -max_temp) %>%
pivot_wider(names_from = genet_tank, values_from = avg_size_log) %>%
column_to_rownames(var = "week") %>%
as.matrix() %>%
t()
marss_data <- all_size_marss
out.tab <- NULL
fits <- list()
for (i in 1:length(Z_models)) {
mod_list$Z <- Z_models[[i]]
fit <- MARSS(y=marss_data, model = mod_list, silent = TRUE,
control = list(maxit = 5000))
out <- data.frame(H = names(Z_models)[i], logLik = fit$logLik,
AICc = fit$AICc, num.param = fit$num.params, m = length(unique(Z_models[[i]])),
num.iter = fit$numIter, converged = !fit$convergence)
out.tab <- rbind(out.tab, out)
fits <- c(fits, list(fit))
}
print(out.tab) #looking for: higher log likelihood, lower AIC
## H logLik AICc num.param m num.iter converged
## 1 all_same 1588.865 -3128.874 24 1 61 TRUE
## 2 diff_treat 1635.665 -3218.325 26 3 61 TRUE
## 3 diff_genet 1616.143 -3175.123 28 5 303 TRUE
## 4 mhw_same 1623.802 -3196.676 25 2 61 TRUE
## 5 amb_ext_same 1623.543 -3196.156 25 2 61 TRUE
## 6 AE_BCD 1602.623 -3154.317 25 2 274 TRUE
## 7 AE_B_C_D 1615.734 -3176.385 27 4 303 TRUE
## 8 ambExt_AE_BCD 1642.047 -3229.011 27 4 271 TRUE
for (i in 1:length(fits)) {
par(mfrow = c(2, 3))
resids <- MARSSresiduals(fits[[i]], type = "tt1")
for (j in 1:15) {
plot(resids$model.residuals[j, ], ylab = "model residuals",
xlab = "")
abline(h = 0)
title(paste(names(Z_models[i]), ":", rownames(marss_data)[j]))
}
}
# Plots 1 (xtT) & 2 (fitted.ytT): Do fitted values seem reasonable?
# Plot 3 (model.resids.ytt1): Do resids have temporal patterns? Do 95% of resids fall withing the CIs?
# Plot 4 (std.model.resids.ytT): Do resids have temporal patterns? Do 95% of resids fall withing the CIs?
# Plot 5 (std.state.resids.xtT): Any outliers?
# Plots 6 & 7 (qqplot.std.model.resids.ytt1: Are resids normal (straight line)?
# Plot 8 (acf.std.model.resids.ytt1): Do resids have temporal autocorrelation?
The residual plots are not looking great… and they’re essentially the same across all models(??)
the.mean <- apply(marss_data, 1, mean)
the.sigma <- sqrt(apply(marss_data, 1, var))
marss_data_z <- (marss_data - the.mean) * (1/the.sigma)
the.mean <- apply(temp_covariate, 1, mean)
the.sigma <- sqrt(apply(temp_covariate, 1, var))
temp_covariate_z <- (temp_covariate - the.mean) * (1/the.sigma)
mod_list_z <- list(
B = "diagonal and equal", #identity is another option, but models have lower logLik and higher AICc; random walk diagonal and unequal gives same model output
U = "zero", #note: unequal doesn't converge even after 5k iterations
Q = "diagonal and equal", #All tanks have same mixing (also, random walk diagonal and unequal is the same AICc and very close Q estimates)
Z = "placeholder",
A = "zero", #because data have been z-scored
R = "diagonal and equal", #observation errors are independent and have equal variances, because there was only ONE sampler (me)
C = C_matrix,
c = temp_covariate_z)
out_tab_z <- NULL
fits_z <- list()
for (i in 1:length(Z_models)) {
mod_list_z$Z <- Z_models[[i]]
fit <- MARSS(y=marss_data_z, model = mod_list_z, silent = TRUE,
control = list(maxit = 5000))
#print(fit)
out <- data.frame(H = names(Z_models)[i], logLik = fit$logLik,
AICc = fit$AICc, num.param = fit$num.params, m = length(unique(Z_models[[i]])),
num.iter = fit$numIter, converged = !fit$convergence)
out_tab_z <- rbind(out_tab_z, out)
fits_z <- c(fits_z, list(fit))
}
print(out_tab_z) #looking for: higher log likelihood, lower AIC
## H logLik AICc num.param m num.iter converged
## 1 all_same 150.9305 -261.2628 20 1 178 TRUE
## 2 diff_treat 151.1195 -257.5173 22 3 178 TRUE
## 3 diff_genet 151.0769 -253.2966 24 5 178 TRUE
## 4 mhw_same 150.9444 -259.2301 21 2 178 TRUE
## 5 amb_ext_same 151.1080 -259.5573 21 2 178 TRUE
## 6 AE_BCD 151.0703 -259.4820 21 2 178 TRUE
## 7 AE_B_C_D 151.0731 -255.3581 23 4 178 TRUE
## 8 ambExt_AE_BCD 151.2521 -255.7161 23 4 178 TRUE
for (i in 1:length(fits_z)) {
par(mfrow = c(2, 3))
resids_z <- MARSSresiduals(fits_z[[i]], type = "tt1")
for (j in 1:15) {
plot(resids_z$model.residuals[j, ], ylab = "model residuals",
xlab = "")
abline(h = 0)
title(paste(names(Z_models[i]), ":", rownames(marss_data_z)[j]))
}
}
The residual plots are looking better… but they’re still essentially the
same across all models(??)
According to AICc (and logLik), definitely don’t use “all_diff” and can probably also rule out “diff_genet”
mod_list_z <- list(
B = "diagonal and unequal", #identity is another option, but models have lower logLik and higher AICc; random walk diagonal and unequal gives same model output
U = "zero", #note: unequal doesn't converge even after 5k iterations
Q = "diagonal and equal", #All tanks have same mixing (also, random walk diagonal and unequal is the same AICc and very close Q estimates)
Z = "placeholder",
A = "zero", #because data have been z-scored
R = "diagonal and equal", #observation errors are independent and have equal variances, because there was only ONE sampler (me)
C = C_matrix,
c = temp_covariate_z)
out_tab_z <- NULL
fits_z <- list()
for (i in 1:length(Z_models)) {
mod_list_z$Z <- Z_models[[i]]
fit <- MARSS(y=marss_data_z, model = mod_list_z, silent = TRUE,
control = list(maxit = 5000))
#print(fit)
out <- data.frame(H = names(Z_models)[i], logLik = fit$logLik,
AICc = fit$AICc, num.param = fit$num.params, m = length(unique(Z_models[[i]])),
num.iter = fit$numIter, converged = !fit$convergence)
out_tab_z <- rbind(out_tab_z, out)
fits_z <- c(fits_z, list(fit))
}
print(out_tab_z) #looking for: higher log likelihood, lower AIC
## H logLik AICc num.param m num.iter converged
## 1 all_same 150.9305 -261.2628 20 1 178 TRUE
## 2 diff_treat 151.1195 -257.5173 22 3 178 TRUE
## 3 diff_genet 151.0769 -253.2966 24 5 178 TRUE
## 4 mhw_same 150.9444 -259.2301 21 2 178 TRUE
## 5 amb_ext_same 151.1080 -259.5573 21 2 178 TRUE
## 6 AE_BCD 151.0703 -259.4820 21 2 178 TRUE
## 7 AE_B_C_D 151.0731 -255.3581 23 4 178 TRUE
## 8 ambExt_AE_BCD 151.2521 -255.7161 23 4 178 TRUE
for (i in 1:length(fits_z)) {
par(mfrow = c(2, 3))
resids_z <- MARSSresiduals(fits_z[[i]], type = "tt1")
for (j in 1:15) {
plot(resids_z$model.residuals[j, ], ylab = "model residuals",
xlab = "")
abline(h = 0)
title(paste(names(Z_models[i]), ":", rownames(marss_data_z)[j]))
}
}
temp_z <- weekly_temp %>%
filter(friday < ymd("2024-02-16")) %>%
arrange(friday, tank) %>%
select(tank, friday, avg_temp) %>%
group_by(tank) %>%
mutate(avg_temp_z = (avg_temp - mean(avg_temp, na.rm = TRUE)) /sd(avg_temp, na.rm = TRUE),
treatment = ifelse(tank < 6, "ambient",
ifelse(tank > 10, "extreme",
"severe")),
tank = as.factor(tank)) %>%
ungroup()
ggplot(temp_z, aes(x = friday, y = avg_temp_z, color = tank)) +
geom_line() +
labs(title = "Z-scored average temperature by tank",
x = "Date",
y = "Z-scored average temperature") +
theme_minimal() +
facet_wrap(~treatment)
#Each treatment different
Z_matrix2 <- matrix(factor(c(rep(c(1, 0, 0), 25),
rep(c(0, 1, 0), 25),
rep(c(0, 0, 1), 25))),
nrow = 75, byrow = TRUE)
# Combine the blocks into a single 75x3 matrix
matrix_data <- rbind(matrix(c(rep(1:5, each = 5), rep(0, 25), rep(0, 25)), nrow = 25, ncol = 3, byrow = FALSE),
matrix(c(rep(0, 25), rep(6:10, each = 5), rep(0, 25)), nrow = 25, ncol = 3, byrow = FALSE),
matrix(c(rep(0, 25), rep(0, 25), rep(10:14, each = 5)), nrow = 25, ncol = 3, byrow = FALSE))
C_matrix2 <- matrix(factor(rep(1:15, each = 5)), ncol = 3)
C_matrix2 <- matrix(0, nrow = 75, ncol = 3)
for (i in 1:3) {
C_matrix2[((i-1)*5 + 1):(i*5), i] <- 1
}
C_matrix2 <- matrix(factor(C_matrix2), nrow = 75, ncol = 3)
model_list <- list(
B = "identity",
U = "zero",
Q = "diagonal and equal",
Z = Z_matrix2,
A = A_matrix,
R = "diagonal and equal",
C = matrix_data,
c = temp_covariate)
# non 75x1 Z matrix Does not run
#fit1 <- MARSS(y=marss_data, model = model_list,
# control = list(maxit = 1000))